###
### Appendix A
###
load("JHRED2014_New.RData")

candidates <- rbind(sntv, smdp)
candidates <- unique(candidates[,c("candID", "ken", "female", "byear", 
                                   "kakusu", "party_id")])

candidates$female <- ifelse(candidates$female==0, "Male", "Female")

candidates$byear <- as.numeric(as.character(candidates$byear))

candidates$byear2 <- NA
s <- 1850
for(i in 1:14){
  candidates$byear2[candidates$byear >= s & candidates$byear < s + 10] <- i
  s <- s + 10
}

# Figure A.1
#pdf("gbp_dist.pdf", width=7, height=5)
layout(matrix(c(1,1,1,1,2,2,2,2,
                0,0,3,3,3,3,0,0), nrow=2, byrow=TRUE))
boxplot(candidates$kakusu ~ candidates$female, main="Gender")
boxplot(candidates$kakusu ~ candidates$byear2, main="Birth Year (by Decade)",
        names=seq(1850, 1980, by=10))
boxplot(candidates$kakusu ~ candidates$ken, main="Prefecture (N = 47)")
#dev.off()

library(lme4)
load("CatalinacReplicationKakusu.RData")

# centering birth years by the minimum
df$birthy <- df$birthy - min(df$birthy, na.rm=TRUE)

# Table A.1
pork1 <- lmer(pork ~ kakusu + (1|name),
              data=df)
summary(pork1)

pork2 <- lmer(pork ~ kakusu + length + female + birthy
              + (1|name) + (1|pty),
              data=df)
summary(pork2)

pork3 <- lmer(pork ~ kakusu + length + female + birthy + m + factor(year)
              + (1|name) + (1|pty),
              data=df)
summary(pork3)

ideology1 <- lmer(sep_theta ~ kakusu + (1|name),
                  data=df)
summary(ideology1)

ideology2 <- lmer(sep_theta ~ kakusu + length + female + birthy
                  + (1|name) + (1|pty),
                  data=df)
summary(ideology2)

ideology3 <- lmer(sep_theta ~ kakusu + length + female + birthy + m + factor(year)
                  + (1|name) + (1|pty),
                  data=df)
summary(ideology3)




###
### Appendix B
###
load("Manifesto_JHRED_Comparison.RData")

years <- unique(comparison$year)

# calculate the proportion of name match between JHRED and 
# manifesto names by election-year
match.prop <- rep(NA, times=18)
for(i in 1:length(years)){
  names1 <- comparison$candidate[comparison$year==years[i]]
  names2 <- comparison$manifesto[comparison$year==years[i]]
  
  l1 <- rep(NA, times=length(names1))
  l2 <- rep(NA, times=length(names1))
  
  for(j in 1:length(names1)){
    n1.sub <- unlist(strsplit(names1[j], ""))
    n2.sub <- unlist(strsplit(names2[j], ""))
    
    l1[j] <- length(n1.sub)
    l2[j] <- length(which(n1.sub %in% n2.sub))
  }
  
  match.prop[i] <- sum(l2)/sum(l1)
}

# calculate correlation between JHRED and manifesto names by election-year
name.cor <- rep(NA, times=18)
for(i in unique(years)){
  name.cor[which(years==i)] <- cor(comparison$kakusu.cand[comparison$year==i], comparison$kakusu.man[comparison$year==i],
                                   use="pairwise.complete.obs")
}

# Figure B.1
#pdf("name_valid.pdf", width=8, height=5)
par(mfrow=c(1,2), mar=c(5.1,5.1,2.1,2.1))
plot(years, match.prop, type="o", pch=19, ylim=c(0,1),
     main="Match Proportion",
     xlab="Election Year", ylab="Match Proportion between\nJHRED Names and Manifesto Names")
grid()
plot(years, name.cor, type="o", pch=17, ylim=c(0,1),
     main="Correlation",
     xlab="Election Year", ylab="Correlation between\nJHRED Names and Manifesto Names")
grid()
#dev.off()




###
### Appendix C
###
library(stargazer)
load("JHRED2014_New.RData")

# Table C.1
stargazer(sntv[,c("voteshare", "kakusu", "kakusu.ave", "diff", "diff.ave",
                  "length", "ku_ncand", "ku_m", "kakusu_sd")],
          omit.summary.stat=c("p25", "p75"))
stargazer(smdp[,c("voteshare", "kakusu", "kakusu.ave", "diff", "diff.ave",
                  "length", "ku_ncand", "kakusu_sd")],
          omit.summary.stat=c("p25", "p75"))




###
### Appendix D
###
library(lme4)
load("JHRED2014_New.RData")

# models with district-year fixed effects
dist.fe1 <- lmer(voteshare ~ kakusu + length + factor(distID)
                 + (1|candID) + (1|ptyID),
                 data=sntv)
summary(dist.fe1)

dist.fe2 <- lmer(voteshare ~ kakusu + length + factor(distID)
                 + (1|candID) + (1|ptyID),
                 data=smdp)
summary(dist.fe2)

# subset to LDP candidates under SNTV
sntv.ldp <- sntv[sntv$party_id==1,]

# calculate the number of LDP candidates in the district
sntv.ldp$n_ldp <- NA
for(i in unique(sntv.ldp$distID)){
  sntv.ldp$n_ldp[sntv.ldp$distID==i] <- nrow(sntv.ldp[sntv.ldp$distID==i,])
}

# models only with LDP candidates
sntv.ldp.only1 <- lmer(voteshare ~ kakusu + length + n_ldp + ku_m + factor(year)
                  + (1|candID),
                  data=sntv.ldp)
summary(sntv.ldp.only1)

sntv.ldp.only2 <- lmer(voteshare ~ kakusu*n_ldp + length + ku_m + factor(year)
                       + (1|candID),
                       data=sntv.ldp)
summary(sntv.ldp.only2)




###
### Appendix E
###

# for models in Table E.2, see Replication01_Main.R




###
### Appendix F
###
load("JHRED2014_New.RData")

# sebset of unique candidates
sntv_first <- unique(sntv[,c("pid", "kakusu", "party_id", "ku_m")])
sntv_first$party_id <- as.numeric(as.character(sntv_first$party_id))

# create party labels
sntv_first$party <- NA
sntv_first$party[sntv_first$party_id==1] <- "LDP"
sntv_first$party[sntv_first$party_id==2] <- "JSP"
sntv_first$party[sntv_first$party_id==3] <- "Komei"
sntv_first$party[sntv_first$party_id==4] <- "DSP"
sntv_first$party[sntv_first$party_id==5] <- "JCP"
sntv_first$party[sntv_first$party_id==6] <- "Reform"
table(sntv_first$party)

# Table F.1
p1 <- lm(kakusu ~ ku_m, 
         data=sntv_first[sntv_first$party=="LDP",])
summary(p1)

p2 <- lm(kakusu ~ ku_m,  
         data=sntv_first[sntv_first$party=="JSP",])
summary(p2)

p3 <- lm(kakusu ~ ku_m, 
         data=sntv_first[sntv_first$party=="Komei",])
summary(p3)

p4 <- lm(kakusu ~ ku_m, 
         data=sntv_first[sntv_first$party=="DSP",])
summary(p4)

p5 <- lm(kakusu ~ ku_m, 
         data=sntv_first[sntv_first$party=="JCP",])
summary(p5)

p6 <- lm(kakusu ~ ku_m, 
         data=sntv_first[sntv_first$party=="Reform",])
summary(p6)

load("CommonLastNames.RData")

# fit loess
loess <- loess(kakusu ~ rank, data=common_last)
lp <- predict(loess, common_last, se=TRUE)

lwr95 <- lp$fit - qt(0.975,lp$df)*lp$se
upr95 <- lp$fit + qt(0.975,lp$df)*lp$se

# Figure F.1
#pdf("common.pdf", width=7, height=5)
par(mar=c(5.3,4.1,2.1,2.1), mfrow =c(1,1))
plot(common_last$rank, common_last$kakusu, pch=1, cex=0.5,
     xlab="Common Last Name Ranking (1-500)", ylab="Last Name Complexity Score")
polygon(c(common_last$rank, rev(common_last$rank)), c(lwr95, rev(upr95)), border=NA, 
        col=rgb(0.5,0.5,0.5,0.5))
lines(common_last$rank, lp$fit, lwd=2)
#dev.off()




###
### Appendix G
###
library(stargazer)
load("JHRED2014_Invalid.RData")

stargazer(sntv_district[,c("invalid", "kakusu.mean", "ku_ncand", "ku_m")],
          omit.summary.stat=c("p25", "p75"))

stargazer(smdp_district[,c("invalid", "kakusu.mean", "ku_ncand")],
          omit.summary.stat=c("p25", "p75"))

# for models in Table G.2, see Replication02_InvalidVote.R
 



###
### Appendix H
###

# for models and marginal effect plots (Table H.1 and Figures H.1 and H.2), 
# see Replication03_Interaction.R